---
title: "GOV 2001/STAT E-200: Group Problem Set 6"
author: "Due October 22, 2022"
output:
  pdf_document: default
  word_document: default
  html_document: default
header-includes:
- \usepackage[labelformat = empty]{caption}
- \usepackage[textfont = bf]{caption}
- \usepackage{graphicx}
- \usepackage{float}
- \usepackage{dcolumn}
fig_width: 3
fig_height: 2
urlcolor: blue
---
Tomonoshin Aoki
Group members: Wyatt, Motunrayo


setwd("C:/Users/taoki/Documents/US/Harvard Chan/fall/GOV 2001 Quantitative Social Science Methods I/collaboration/PS6/dataverse_files")

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

Information about the paper:

Original paper title: The politics of pain: Medicaid expansion, the ACA and the opioid epidemic

Dataverse link: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SUPEL1&version=1.0
Journal published: Journal of Public Policy 




```{r}
rm(list=ls())
#had to install US map
library(usmap)
library(readr)
library(haven)
library(foreign)
library(tidyr)
library(dplyr)
library(rdrobust) 
library(rddensity)
library(lmtest)
library(ggplot2)
library(dotwhisker)
library(tidyverse)
library(crosstable)
library(tinytex)
library(lmtest)
library(sandwich)

#setwd("/Users/jwyattkoma/Documents/Harvard Fall 2022 Coursework/GOV 2001 Stats/replication paper")


```






```{r}
###################################################
#       Loading the Data
###################################################

# Border sample of counties within 100 miles of opposing Medicaid expansion border
grdborder <- read.csv("opioid_analyses_data.csv")
# subset of original data for Republican-leaning states 
similarstates <- read.csv("redstates_analyses_data.csv")
# Data on when states expanded Medicaid
medicaidexpansionyear <- read_csv("medicaidexpansionyear.csv")
# Trump tweets about health
tweets <- read.csv("trump_electionyeartweets.csv")
# NYT Opioid Data
nyt <- read.csv("nyt_opioid_mentions.csv")

#county-level election data
swing <- read_csv("countypres_2000-2020.csv")

```


```{r}
#rename county FIPS to FIPS
swing<-swing %>% 
  rename(fips=county_fips)

#create a variable for the political party that has the max number of votes in the election
swing<-swing %>% 
  group_by(year,fips) %>% 
  summarise(mainparty=.$party[which.max(candidatevotes)])



#create a new variable by year for the party that got the main number of Pres votes in each county
swing2000<-swing %>% 
  filter(year==2000) %>% 
  mutate(party2000=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2000)

swing2004<-swing %>% 
  filter(year==2004) %>% 
  mutate(party2004=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2004)

swing2008<-swing %>% 
  filter(year==2008) %>% 
  mutate(party2008=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2008)

swing2012<-swing %>% 
  filter(year==2012) %>% 
  mutate(party2012=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2012)

swing2016<-swing %>% 
  filter(year==2016) %>% 
  mutate(party2016=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2016)

swing2020<-swing %>% 
  filter(year==2020) %>% 
  mutate(party2020=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2020)

#merge objects that include counties by election years by county FIPS code
swing_join<-left_join(swing2000,swing2004,by="fips")
swing_join<-left_join(swing_join,swing2008,by="fips")
swing_join<-left_join(swing_join,swing2012,by="fips")
swing_join<-left_join(swing_join,swing2016,by="fips")
swing_join<-left_join(swing_join,swing2020,by="fips")

###
###create a new variable in an election year compared to next election year to determine swing status
###Ignore green party and other party for simplicity
###

swing_join<-swing_join %>% 
  mutate("swing2000to2004"=case_when(
    party2000=="REPUBLICAN"&party2004=="REPUBLICAN" ~ "RR",
    party2000=="DEMOCRAT"&party2004=="DEMOCRAT" ~ "DD",
    party2000=="DEMOCRAT"&party2004=="REPUBLICAN" ~ "DR",
    party2000=="REPUBLICAN"&party2004=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2004to2008"=case_when(
    party2004=="REPUBLICAN"&party2008=="REPUBLICAN" ~ "RR",
    party2004=="DEMOCRAT"&party2008=="DEMOCRAT" ~ "DD",
    party2004=="DEMOCRAT"&party2008=="REPUBLICAN" ~ "DR",
    party2004=="REPUBLICAN"&party2008=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2008to2012"=case_when(
    party2008=="REPUBLICAN"&party2012=="REPUBLICAN" ~ "RR",
    party2008=="DEMOCRAT"&party2012=="DEMOCRAT" ~ "DD",
    party2008=="DEMOCRAT"&party2012=="REPUBLICAN" ~ "DR",
    party2008=="REPUBLICAN"&party2012=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2012to2016"=case_when(
    party2012=="REPUBLICAN"&party2016=="REPUBLICAN" ~ "RR",
    party2012=="DEMOCRAT"&party2016=="DEMOCRAT" ~ "DD",
    party2012=="DEMOCRAT"&party2016=="REPUBLICAN" ~ "DR",
    party2012=="REPUBLICAN"&party2016=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2016to2020"=case_when(
    party2016=="REPUBLICAN"&party2020=="REPUBLICAN" ~ "RR",
    party2016=="DEMOCRAT"&party2020=="DEMOCRAT" ~ "DD",
    party2016=="DEMOCRAT"&party2020=="REPUBLICAN" ~ "DR",
    party2016=="REPUBLICAN"&party2020=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))


#change FIPS to integer in swing_join dataset to allow merge to main file
swing_join$fips <- as.integer(swing_join$fips)

#Merge county-level indicator for swing status to be in main analysis the author create
grdborder<-left_join(grdborder,swing_join,by="fips")


###
##Take a look at the swing counties from 08-2012
###
table(grdborder$swing2008to2012)

###
##create a new variable rolling up DR and RD swing counties to overall for sample size issues
### (only 4 RD counties)

grdborder<-grdborder %>% 
  mutate("swing_indicator_08to12"=case_when(
    swing2008to2012== "DD" | swing2008to2012== "RR" ~ 'non-swing county',
        swing2008to2012== "DR" | swing2008to2012== "RD" ~ 'swing county',
          TRUE ~"other"  ))

table(grdborder$swing2008to2012,grdborder$swing_indicator_08to12)

# made  the swing counties from 04-2008
grdborder<-grdborder %>% 
  mutate("swing_indicator_04to08"=case_when(
    swing2004to2008== "DD" | swing2004to2008== "RR" ~ 'non-swing county04to08',
        swing2004to2008== "DR" | swing2004to2008== "RD" ~ 'swing county04to08',
          TRUE ~"other"  ))

table(grdborder$swing_indicator_04to08)
table(grdborder$swing2004to2008,grdborder$swing_indicator_04to08)


grdborder<-grdborder %>% 
  mutate("swing"=case_when(
    swing2004to2008== "DD"&swing2008to2012== "DD" ~ 'always_D',
    swing2004to2008== "RR"&swing2008to2012== "RR" ~ 'always_R',
          TRUE ~'swing'  ))

grdborder<-grdborder %>% 
  mutate("always_D" =case_when(
    swing== "always_D" ~ 1,
          TRUE ~ 0  ))

grdborder<-grdborder %>% 
  mutate("always_R" =case_when(
    swing== "always_R" ~ 1,
          TRUE ~ 0  ))

grdborder<-grdborder %>% 
  mutate("ind_swing" =case_when(
    swing== "swing" ~ 1,
          TRUE ~ 0  ))



```



```{r}
###################################################
#                 Maps 
###################################################
# Expansion Status Maps for Full and Border Samples

# Prepping data for plots (defining when a case expanded and whether included in border)
medicaidexpansionyear$expanded_15 <- NA
medicaidexpansionyear$expanded_15[medicaidexpansionyear$year_expanded<=2015] <- 1
medicaidexpansionyear$expanded_15[medicaidexpansionyear$year_expanded>=2016] <- 0
medicaidexpansionyear$expanded_15[medicaidexpansionyear$medicaid_expansion==0] <- 0
medicaidexpansionyear$state <- medicaidexpansionyear$state_abv
medicaidexpansionyear$Expansion[medicaidexpansionyear$expanded_15==1] <- "Yes"
medicaidexpansionyear$Expansion[medicaidexpansionyear$expanded_15==0] <- "No"


### Full Sample Map 

usmap::plot_usmap(data = medicaidexpansionyear, values = "Expansion")  + 
  scale_fill_manual(name = "Expanded Medicaid", 
                    labels = c("Yes" = "Yes", 
                               "No" = "No"),
                    values = c("Yes" = "#2D2D2D", 
                               "No" = "#D0D0D0"), na.translate=FALSE) + theme(legend.position="right") 


## Border Sample Map 

usmap::plot_usmap(include = c("AR", "AZ", "CO", "IA", "ID", "IL", "KS", "KY", "LA", "MD", "ME", "MI",
                       "MN", "MO", "MS", "MT", "ND", "NM", 
                       "NE", "NH", "NV", "OK", "OR", "SD", "TN", "TX", "UT",
                       "VA", "WA", "WI", "WV", "WY"),
           data = medicaidexpansionyear, values = "expanded_15") +  scale_fill_continuous(low="#D0D0D0", high="#2D2D2D",name = "Expanded Medicaid (2015)", label = scales::comma) + 
  theme(legend.position = "none")
```


```{r}
##########################################
#    Research Design Assumption Plots
########################################## 

grdborder$Expansion <- as.factor(grdborder$z)

# Pre-trend discontinuity in partisanship? No! 
p <- ggplot(grdborder, aes(x = x, y = dm2008, color = Expansion)) +
  geom_point(size = 0.5, alpha = 0.5) + 
  # Add a line based on a linear model for the people scoring less than 70
  geom_smooth(data = filter(grdborder, x <= 0), method = "lm") +
  # Add a line based on a linear model for the people scoring 70 or more
  geom_smooth(data = filter(grdborder, x > 0), method = "lm") +
  geom_vline(xintercept = 0) +
  labs(x = "Distance to Medicaid Border", y = "Democratic Two-Party Vote (2008)", color = "Expansion")
p1 <- p +   scale_colour_grey()

p1

# Pre-trend discontinuity in opioids? No! 
p <- ggplot(grdborder, aes(x = x, y = opioidrate_10, color = Expansion)) +
  geom_point(size = 0.5, alpha = 0.5) + 
  # Add a line based on a linear model for the people scoring less than 70
  geom_smooth(data = filter(grdborder, x <= 0), method = "lm") +
  # Add a line based on a linear model for the people scoring 70 or more
  geom_smooth(data = filter(grdborder, x > 0), method = "lm") +
  geom_vline(xintercept = 0) +
  labs(x = "Distance to Medicaid Border", y = "Opioid Prescription Rate (2010)", color = "Expansion")
p1 <- p +   scale_colour_grey()

p1
```












```{r}
###################################################
#            Elections Analyses 
###################################################
#install.packages('estimate')
#library(estimate)
library(stargazer)

####
#### create subset of the data (d/r/swing counties) to run the regressions on below
####

grdborder_demsubset <- grdborder %>%
                          filter(always_D == 1) %>%
                            ungroup() 
table(grdborder_demsubset$swing)

grdborder_repsubset <- grdborder %>%
                          filter(swing == "always_R") %>%
                            ungroup() 
table(grdborder_repsubset$swing)

grdborder_swingsubset <- grdborder %>%
                          filter(ind_swing == 1)  %>%
                            ungroup() 
table(grdborder_swingsubset$swing)


#######################Column 1

######
###### ORIGINAL ANALYSIS
######
# Code used to produce main regression table in the main text 

# Opioid Increase (Regression Table 1, Column 1)
med <- lm(shift_16_12 ~  increase*z + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder, weights=vap2016)
#summary(med)
# heteroskedasticity-consistent standard errors 
#med<- coeftest(med, vcov = vcov(med, type = "HC0"))
#med2


######
###### REPLICATION ANALYSIS (with swing status indicator added that identifies 2004-2012 swing counties)
######

# Opioid Increase (Regression Table 1, Column 1) - Republican county subset
med_col1_swing_alwaysr <- lm(shift_16_12 ~  increase*z+ x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder_repsubset, weights=vap2016)

# Opioid Increase (Regression Table 1, Column 1)- Democratic county subset
med_col1_swing_alwaysd <- lm(shift_16_12 ~  increase*z + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder_demsubset, weights=vap2016)

# Opioid Increase (Regression Table 1, Column 1)- Swing county subset
med_col1_swing_ind_swing<- lm(shift_16_12 ~  increase*z + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder_swingsubset, weights=vap2016)

stargazer(
      #Column for original data from journal replication
        med, 
      #Column for regression adjusted for counties that were always republican 
        med_col1_swing_alwaysr,
      #Column for regression adjusted for counties that were always Dem 
        med_col1_swing_alwaysd,
      #Column for regression adjusted for counties that were ever swing counties  
        med_col1_swing_ind_swing, 
        type = "text", 
        #omit fixed effects for state from output bc it looks bad
        omit = c('stateabbr', 'x', 'I(x^2)', 'dm2004'),
         covariate.labels = c(
           "Opioid increase", 
           "Medicaid expansion", 
          "Opioid increase X Medicaid expansion"),
        dep.var.caption = "Change in democratic vote from 2012 to 2016",
         column.labels = c("Original paper regression", "Always Republican counties", "Always Democrat", "Ever swing counties"),
         dep.var.labels = "Democratic vote share in 2016 minus Democratic vote share in 2012",
         title="Table 1. Comparing original analysis with extension work",
         notes= "Table 1 presents linear regression models adjusted for state fixed effects, a measure of distance to nearby non-expansion state, a polynomial term of distance to non-expansion state, and 2004 democratic vote share (not shown in output but available upon request). ModelColumn 1 presents original replication data from Michael Shepherd 2022; Columns 2 through 4 present extension analysis on subset of counties that were always Republican, always Democrat, or changed from Democrat to Republican in the 2008-2012 presidential elections (See methods for more information on classification).",
         #change order of rows slightly so that relevant rows are next to eachother for readability
        # order=c(1,2,3,4,5,6,7,8,10,12,9,11,13,14),
         digits=3, 
         out="draft1_columns as subsets.htm"
         )


```


 ~  increase*z*always_R + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr

```{r}
#descriptive analysis
library(arsenal) 
library(magrittr)
library(kableExtra)
library(table1)
library(gt)


grdborder$Expansion <- 
  factor(grdborder$Expansion, levels=c(1,0),
         labels=c("Expansion", 
                  "Non expantion"))




label(grdborder$shift_16_12)<-"Democratic Two-Party Vote (2016–2012)"
label(grdborder$Expansion)<-"Medicaid Expansion"
label(grdborder$dm2004)<-"Democratic Vote (2004)"
label(grdborder$increase)<-"Opioid Increase"
label(grdborder$pop)<-"Population"
label(grdborder$pop.density)<-"Population density"
label(grdborder$unemployment)<-"Unemployment rate(%)"
label(grdborder$median.income)<-"Median income($/year)"

grdborder$swingx <- 
  factor(grdborder$swing, levels=c( "always_R", "always_D", "swing"),
         labels=c("Always Republican counties",  "Always Democratic counties","Swing counties"))

table_one <- table1( ~ shift_16_12+Expansion+dm2004+increase+pop+pop.density+unemployment+median.income|swingx, data = grdborder,output = "html") 


table_one

```





```{r}
#making cross table
grdborder$Expansion2[grdborder$z==1] <- "Yes"
grdborder$Expansion2[grdborder$z==0] <- "No"

df<-tapply(drop_na(grdborder)$shift_16_12, list(drop_na(grdborder)$Expansion2, drop_na(grdborder)$swingx), mean)
df<-as.data.frame(df)
df

```



```{r}
###################################################
#                 Maps 
###################################################
# Expansion Status Maps for Full and Border Samples

# Prepping data for plots (defining when a case expanded and whether included in border)
medicaidexpansionyear$expanded_15 <- NA
medicaidexpansionyear$expanded_15[medicaidexpansionyear$year_expanded<=2015] <- 1
medicaidexpansionyear$expanded_15[medicaidexpansionyear$year_expanded>=2016] <- 0
medicaidexpansionyear$expanded_15[medicaidexpansionyear$medicaid_expansion==0] <- 0
medicaidexpansionyear$state <- medicaidexpansionyear$state_abv
medicaidexpansionyear$Expansion[medicaidexpansionyear$expanded_15==1] <- "Yes"
medicaidexpansionyear$Expansion[medicaidexpansionyear$expanded_15==0] <- "No"


### Full Sample Map 

usmap::plot_usmap(data = medicaidexpansionyear, values = "Expansion")  + 
  scale_fill_manual(name = "Expanded Medicaid", 
                    labels = c("Yes" = "Yes", 
                               "No" = "No"),
                    values = c("Yes" = "#2D2D2D", 
                               "No" = "#D0D0D0"), na.translate=FALSE) + theme(legend.position="right") 



plot_usmap(regions = "counties") + 
  labs(title = "US Counties",
       subtitle = "This is a blank map of the counties of the United States.") + 
  theme(panel.background = element_rect(color = "black", fill = "lightblue"))


df <- data.frame(
  fips = c(grdborder$fips),
  values = c(as.factor(grdborder$swingx)
             )
)

plot_usmap(data = df)+
  scale_fill_manual(name = "Counties", 
                    values = c("Always Republican counties" = "red", 
                               "Always Democratic counties" = "blue",
                               "Swing counties" = "purple"), na.translate=TRUE)

```


```{r}
##########################################
#    Research Design Assumption Plots
########################################## 

grdborder$Expansion <- as.factor(grdborder$z)

# Pre-trend discontinuity in partisanship? No! 
p <- ggplot(grdborder, aes(x = x, y = dm2008, color = swingx)) +
      scale_color_manual(values = c('red', 'blue', 'purple'))+
  geom_point(size = 0.5, alpha = 0.5) + 
  # Add a line based on a linear model for the people scoring less than 70
  geom_smooth(data = filter(grdborder, x <= 0), method = "lm") +
  # Add a line based on a linear model for the people scoring 70 or more
  geom_smooth(data = filter(grdborder, x > 0), method = "lm") +
  geom_vline(xintercept = 0) +
  labs(x = "Distance to Medicaid Border", y = "Democratic Two-Party Vote (2008)", color = "swing")+
 annotate("rect", xmin = 0, xmax = 100, ymin = 0, ymax = 100,
           alpha = .1,fill = "green")+ 

   geom_label(
    label="Non expansion states", 
    x=-50,
    y=85,
    label.padding = unit(0.55, "lines"), # Rectangle size around label
    label.size = 0.35,
    color = "black",
    fill="grey"
  )+ 

   geom_label(
    label="Expansion states", 
    x=50,
    y=85,
    label.padding = unit(0.55, "lines"), # Rectangle size around label
    label.size = 0.35,
    color = "black",
    fill="#69b3a2"
  )


p1 <- p 

p1


grdborder$Expansion <- as.factor(grdborder$z)

# Pre-trend discontinuity in partisanship? No! 
p2 <- ggplot(grdborder, aes(x = x, y = shift_16_12, color = swingx)) +
  scale_color_manual(values = c('red', 'blue', 'purple'))+
  geom_point(size = 0.5, alpha = 0.5) + 
  # Add a line based on a linear model for the people scoring less than 70
  geom_smooth(data = filter(grdborder, x <= 0), method = "lm") +
  # Add a line based on a linear model for the people scoring 70 or more
  geom_smooth(data = filter(grdborder, x > 0), method = "lm") +
  geom_vline(xintercept = 0) +
  labs(x = "Distance to Medicaid Border", y = "Democratic Two-Party Vote (2016–2012)", color = "swing")+
 annotate("rect", xmin = 0, xmax = 100, ymin = -25, ymax = 10,
           alpha = .1,fill = "green")+ 

   geom_label(
    label="Non expansion states", 
    x=-50,
    y=5,
    label.padding = unit(0.55, "lines"), # Rectangle size around label
    label.size = 0.35,
    color = "black",
    fill="grey"
  )+ 

   geom_label(
    label="Expansion states", 
    x=50,
    y=5,
    label.padding = unit(0.55, "lines"), # Rectangle size around label
    label.size = 0.35,
    color = "black",
    fill="#69b3a2"
  )

p3 <- p2 

p3
```








